In [1]:
# Import necessary libraries
import requests
import pandas as pd
import time
import requests_cache
import re
from bs4 import BeautifulSoup
import datetime
import itertools 
import threading
import plotly.express as px
import altair as alt
session = requests_cache.CachedSession('project_cache')
In [2]:
# Define list of cities and their most central zip codes
city_list = 'Youngstown' , 'Pittsburgh', 'Washington DC','Dover', 'Philadelphia', 'New York City','Baltimore'
zipcodes = '44413', '15201', '20059' , '19901', '19148' , '10001', '21201'
In [3]:
# Create a list of dates 30 days AFTER the derailment
start_date = datetime.datetime.strptime("2023-02-03", "%Y-%m-%d")
K = 30 # 30 days after
date_generated = pd.date_range(start_date, periods=K)

# List of dates
month_after_dates = list(date_generated.strftime("%Y-%m-%d"))
In [4]:
# Create a list of dates 30 days BEFORE the derailment
start_date = datetime.datetime.strptime("2023-01-05", "%Y-%m-%d")
K = 30 # 30 days after
date_generated = pd.date_range(start_date, periods=K)

# List of dates
month_before_dates = list(date_generated.strftime("%Y-%m-%d"))
In [5]:
# Make a function that takes in zipcode and date value
def get_data(zipcode, date):
    url = "https://www.airnowapi.org/aq/observation/zipCode/historical/" + "?format=application/xml" +"&zipCode=" + \
    zipcode + "&date=" + date + "T00-0000"+"&distance=25" + "&api_key=6DF75E4D-6FD6-4147-80D4-823042F30E27"
    response = requests.get(url, timeout=30)
    
    return response.text
In [6]:
# Make a function that processes the data
def fetch_and_process_data(zipcode, dates):
    # Fetch the data
    data_output = ''
    for i in dates:
        data_output += get_data(zipcode,i) #extend array with each iteration
        print(data_output)
        
    # Start compiling output
    xml = BeautifulSoup(data_output, "html.parser")
    
    # Extract parameter 
    parameter = xml.find_all('parametername')
    parameter_values = re.findall(r'<parametername>(.*?)<\/parametername>', str(parameter))
    
    # Fetch the aqi values
    aqi = xml.find_all('aqi')
    aqi_values = re.findall(r'<aqi>(\d+)</aqi>', str(aqi))
    
    # Fetch the dates
    date = xml.find_all('dateobserved')
    date_values = re.findall(r'<dateobserved>(\d{2}/\d{2}/\d{4})', str(date))
    
    # Fetch location
    area = xml.find_all('reportingarea')
    area_values = re.findall(r'<reportingarea>(.*?)</reportingarea>', str(area))
    
    # Compile all info into one dataset
    data = {'Date': date_values,
        'AQI': aqi_values,
        'Parameter': parameter_values,
        'Location': area_values}
    
    # Convert dataset to data frame
    data_df = pd.DataFrame(data,columns=['Date','AQI','Parameter','Location'])
    data_df = pd.DataFrame(data)
    
    return data_df
In [19]:
# Retrieve data for all 6 locations

#Youngstown
youngstown_after = fetch_and_process_data(zipcodes[0],month_after_dates)
youngstown_before = fetch_and_process_data(zipcodes[0],month_before_dates)

#Pittsburgh
pittsburgh_after = fetch_and_process_data(zipcodes[1],month_after_dates)
pittsburgh_before = fetch_and_process_data(zipcodes[1],month_before_dates)

#Washington DC
DC_after = fetch_and_process_data(zipcodes[2],month_after_dates)
DC_before = fetch_and_process_data(zipcodes[2],month_before_dates)

#Dover
dover_after = fetch_and_process_data(zipcodes[3],month_after_dates)
dover_before = fetch_and_process_data(zipcodes[3],month_before_dates)

#Philadelphia
philadelphia_after = fetch_and_process_data(zipcodes[4],month_after_dates)
philadelphia_before = fetch_and_process_data(zipcodes[4],month_before_dates)

#New York City
newyork_after = fetch_and_process_data(zipcodes[5],month_after_dates)
newyork_before = fetch_and_process_data(zipcodes[5],month_before_dates)

#Baltimore 
baltimore_after = fetch_and_process_data(zipcodes[6],month_after_dates)
baltimore_before = fetch_and_process_data(zipcodes[6],month_before_dates)
In [8]:
# Concatenate all data frames
compiled_data = pd.concat([youngstown_before, youngstown_after,pittsburgh_before, pittsburgh_after, DC_before, DC_after,
                          dover_before, dover_after, philadelphia_before, philadelphia_after, newyork_before,
                          newyork_after,baltimore_before,baltimore_after], axis=0)
In [9]:
# Create a new DataFrame with a unique index
compiled_data_new_index = compiled_data.reset_index(drop=True)

# Reindex the new DataFrame with the new index
new_index = range(0,len(compiled_data))
compiled_data_new_index = compiled_data_new_index.reindex(new_index)

# Convert AQI values from str to int
compiled_data_new_index['AQI'] = compiled_data_new_index['AQI'].astype(int)
In [10]:
# Extract PM2.5 values
row_pm25 = compiled_data_new_index.loc[compiled_data_new_index['Parameter'] == 'PM2.5']

# Sort aqi values
pm25_sorted_aqi = row_pm25.sort_values('AQI')

# Sort dates
pm25_sorted_aqi['Date'] = pd.to_datetime(pm25_sorted_aqi['Date'])
pm25_sorted_dates = pm25_sorted_aqi.sort_values('Date')
In [16]:
# Extract PM10 values
row_pm10 = compiled_data_new_index.loc[compiled_data_new_index['Parameter'] == 'PM10']

# Sort aqi values
pm10_sorted_aqi = row_pm10.sort_values('AQI')

# Sort dates
pm10_sorted_aqi['Date'] = pd.to_datetime(pm10_sorted_aqi['Date'])
pm10_sorted_dates = pm10_sorted_aqi.sort_values('Date')
In [12]:
# Extract Ozone valuees
row_ozone = compiled_data_new_index.loc[compiled_data_new_index['Parameter'] == 'OZONE']

# Sort aqi values
ozone_sorted_aqi = row_ozone.sort_values('AQI')

# Sort dates
ozone_sorted_aqi['Date'] = pd.to_datetime(ozone_sorted_aqi['Date'])
ozone_sorted_dates = ozone_sorted_aqi.sort_values('Date')
In [13]:
# sort aqi values
df_sorted_aqi = compiled_data_new_index.sort_values('AQI')

# sort dates
df_sorted_aqi['Date'] = pd.to_datetime(df_sorted_aqi['Date'])
df_sorted_dates = df_sorted_aqi.sort_values('Date')
In [14]:
# Plot PM2.5 values
fig_pm25 = px.line(pm25_sorted_dates, x="Date", y="AQI", symbol="Location", color = "Location", title = 'PM2.5')
fig_pm25.update_traces(textposition="bottom right")
fig_pm25.show()
In [17]:
# Plot PM10 values
fig_ozone = px.line(ozone_sorted_dates, x="Date", y="AQI", symbol="Location", color = "Location", title = 'Ozone')
fig_ozone.update_traces(textposition="bottom right")
fig_ozone.show()
In [18]:
# Plot PM10 values
fig_pm10 = px.line(pm10_sorted_dates, x="Date", y="AQI", symbol="Location", color = "Location", title = 'PM10')
fig_pm10.update_traces(textposition="bottom right")
fig_pm10.show()